Setup

Install and Attach

libinstall <- function(pkg) {
    if(!require(pkg, character.only = T))install.packages(pkg)
    library(pkg, character.only = T)
}

libinstall("tidyverse")
libinstall("glue")
libinstall("readr")
libinstall("plotly")
libinstall("readr")
libinstall("readxl")
libinstall("lubridate")
libinstall("curl")
libinstall("epidata")

Import Data

Import cpsaat Data

if(!curl::has_internet())quit()

# Download cpsaat data
tmp <- tempfile()
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = tmp)

# Import cpsaat
cpsaat11 <- read_excel(
        tmp,
        col_names = c(
            "Occupation",
            "Total",
            "Women",
            "White",
            "Black/African American",
            "Asian",
            "Hispanic/Latino"
        ),
        na = "–",
        col_types = c(
            Occupation="text",
            Total="numeric",
            "Women"="numeric",
            "White"="numeric",
            "Black/African American"="numeric",
            "Asian"="numeric",
            "Hispanic/Latino"="numeric"
        ),
        skip = 7
    )%>%
    drop_na(Occupation)
file.remove(tmp)
## [1] TRUE
rm(tmp)

Import EPI Data

Get the data at EPI

Labor_force_participation <- epidata::get_labor_force_participation_rate(by = "gr")

Medianaverage_hourly_wages <- epidata::get_median_and_mean_wages(by = "gr")

Minimum_wage <- epidata::get_minimum_wage()

Clean Data

Clean cpsaat11

cpsaat11
## # A tibble: 596 x 7
##    Occupation         Total Women White `Black/African A~ Asian `Hispanic/Latin~
##    <chr>              <dbl> <dbl> <dbl>             <dbl> <dbl>            <dbl>
##  1 Total, 16 years ~ 147795  46.8  78                12.1   6.4             17.6
##  2 Management, prof~  63644  51.7  78.7               9.7   8.6             10.4
##  3 Management, busi~  27143  44.6  81.7               8.8   6.7             10.9
##  4 Management occup~  18564  40.4  83.4               8     5.8             10.7
##  5 Chief executives    1669  29.3  88                 4.3   5.4              7.4
##  6 General and oper~   1057  30.5  84.4               7.1   4.5             12.4
##  7 Legislators           25  NA    NA                NA    NA               NA  
##  8 Advertising and ~     56  52.1  80.5              14.7   3.9              3.5
##  9 Marketing manage~    554  60.7  84.1               5.5   7.6              9.9
## 10 Sales managers       521  30.9  87.6               5.8   4.2              7.6
## # ... with 586 more rows

Looks fine.

Clean Labor_force_participation

Labor_force_participation
## # A tibble: 513 x 13
##    date         all women   men black black_women black_men hispanic
##    <date>     <dbl> <dbl> <dbl> <dbl>       <dbl>     <dbl>    <dbl>
##  1 1978-01-01    NA    NA    NA    NA          NA        NA       NA
##  2 1978-02-01    NA    NA    NA    NA          NA        NA       NA
##  3 1978-03-01    NA    NA    NA    NA          NA        NA       NA
##  4 1978-04-01    NA    NA    NA    NA          NA        NA       NA
##  5 1978-05-01    NA    NA    NA    NA          NA        NA       NA
##  6 1978-06-01    NA    NA    NA    NA          NA        NA       NA
##  7 1978-07-01    NA    NA    NA    NA          NA        NA       NA
##  8 1978-08-01    NA    NA    NA    NA          NA        NA       NA
##  9 1978-09-01    NA    NA    NA    NA          NA        NA       NA
## 10 1978-10-01    NA    NA    NA    NA          NA        NA       NA
## # ... with 503 more rows, and 5 more variables: hispanic_women <dbl>,
## #   hispanic_men <dbl>, white <dbl>, white_women <dbl>, white_men <dbl>
Participation=Labor_force_participation%>%
    pivot_longer(-c(date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
    filter(grepl("women|men", Race, ignore.case = T))%>%
    mutate(
        Gender=Race,
        Race=NA_character_
    )%>%
    union(
        Participation%>%
            filter(!grepl("women|men", Race, ignore.case = T))
    )
Participation%>%
    filter(!is.na(Race))
## # A tibble: 5,020 x 4
##    date       Race     Gender Participation
##    <date>     <chr>    <chr>          <dbl>
##  1 1978-12-01 all      <NA>           0.634
##  2 1978-12-01 black    <NA>           0.617
##  3 1978-12-01 black    women          0.535
##  4 1978-12-01 black    men            0.718
##  5 1978-12-01 hispanic <NA>           0.633
##  6 1978-12-01 hispanic women          0.47 
##  7 1978-12-01 hispanic men            0.812
##  8 1978-12-01 white    <NA>           0.635
##  9 1978-12-01 white    women          0.499
## 10 1978-12-01 white    men            0.785
## # ... with 5,010 more rows
rm(Labor_force_participation)

Clean Medianaverage_hourly_wages

Medianaverage_hourly_wages
## # A tibble: 47 x 25
##     date median average men_median men_average women_median women_average
##    <dbl>  <dbl>   <dbl>      <dbl>       <dbl>        <dbl>         <dbl>
##  1  1973   17.3    20.1       21.0        23.6         13.2          15.1
##  2  1974   16.9    19.7       20.7        23.1         13            14.9
##  3  1975   16.9    19.8       21.0        23.1         13.2          15.1
##  4  1976   16.9    20.0       20.7        23.4         13.3          15.4
##  5  1977   16.9    19.9       20.9        23.4         13.2          15.2
##  6  1978   17.1    19.9       21.2        23.5         13.2          15.3
##  7  1979   16.8    20.1       21.1        23.7         13.4          15.5
##  8  1980   16.7    19.7       20.9        23.2         13.3          15.3
##  9  1981   16.5    19.6       20.4        23.0         13.4          15.3
## 10  1982   16.4    19.8       20.4        23.2         13.2          15.6
## # ... with 37 more rows, and 18 more variables: white_median <dbl>,
## #   white_average <dbl>, black_median <dbl>, black_average <dbl>,
## #   hispanic_median <dbl>, hispanic_average <dbl>, white_men_median <dbl>,
## #   white_men_average <dbl>, black_men_median <dbl>, black_men_average <dbl>,
## #   hispanic_men_median <dbl>, hispanic_men_average <dbl>,
## #   white_women_median <dbl>, white_women_average <dbl>,
## #   black_women_median <dbl>, black_women_average <dbl>,
## #   hispanic_women_median <dbl>, hispanic_women_average <dbl>
Wages=Medianaverage_hourly_wages%>%
    pivot_longer(-date, names_to = "Race", values_to = "Wage", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender", "Summary"), fill = "left")

# Race is in the wrong location sometimes
Wages=Wages%>%
    filter(!grepl("women|men", Gender, ignore.case = T))%>%
    mutate(
        Race=Gender,
        Gender=NA_character_
    )%>%
    union(
        Wages%>%
            filter(grepl("women|men", Gender, ignore.case = T))
    )
# No need to keep the Average and Median split up
Wages=Wages%>%
    pivot_wider(names_from = Summary, values_from = Wage)
rm(Medianaverage_hourly_wages)

Clean Minimum_wage

#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
    mutate(
        Min2019=priceR::adjust_for_inflation(
            federal_minimum_wage_real_x_2018_dollars,
            2018,
            "US",
            2019
        )
    )
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US 
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
    rename(MinCur=federal_minimum_wage_nominal_dollars)%>%
    select(Min2019, MinCur, date)

Fix inconsistant case

Wages=Wages%>%
    rename(
        Date=date,
        Median=median,
        Average=average
    )

Participation=Participation%>%
    rename(Date=date)

Minimum_wage=Minimum_wage%>%
    rename(Date=date)

Graph

Average and Medium Wage over Time by Race and Gender

g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Average))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Median))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)

Scatter Plot over Time

g=Wages%>%
    ggplot()+
    geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
    ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)